#######################
# set working directory
# load data
# and load libraries
#######################

# remove objects
rm(list=ls())
# detach all libraries
detachAllPackages <- function() {
  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]
  package.list <- setdiff(package.list,basic.packages)
  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE)
}
detachAllPackages()

# load libraries
pkgTest <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg)) 
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
  sink()
}

lapply(c("ggplot2", "sampleSelection", "tidyr", "magrittr", "dplyr", "stargazer", "gridExtra",
         "reshape2", "texreg", "interflex", "Zelig", "MASS", "margins", "plyr", "countrycode"), pkgTest)
install.packages('https://cran.r-project.org/src/contrib/Archive/ZeligChoice/ZeligChoice_0.9-6.tar.gz', repos = NULL, type = 'source')
library(ZeligChoice)
# set working directory to parent replication folder
# this shouldn't be impacted where you downloaded 
# the replication files
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# load data
finalData <- read.csv("../data/finalData.csv", stringsAsFactors = T)
finalData$NY.GDP.MKTP.CD <- log(finalData$NY.GDP.MKTP.CD)
finalData$demoBin <- relevel(finalData$demoBin, ref="autocracy")

############
# Figure A.1
############

combinedData <- finalData[, c("year", "country1", "country2", "polity2", "polconiii", "ICSID",
              "NY.GDP.MKTP.CD","NY.GDP.MKTP.KD.ZG", "NE.TRD.GNFS.ZS","BX.KLT.DINV.WD.GD.ZS","NY.GDP.TOTL.RT.ZS",
              "timeUntilAnyElec", "demoBin", "anyViolations", "violations", "treatyExistence")]
combinedData <- combinedData %>% drop_na()
combinedData <- melt(combinedData)
levels(combinedData$variable) <- c("Year",  "Polity 2", "Political Constraints", "log(GDP)",
                                      "GDP change", "Trade (% of GDP)", "FDI net inflows (% of GDP)",
                                       "Nat. resources (% of GDP)", "Time to election", "Any BIT violation", "BIT violations", "Treaty existence")
combinedData$variable <- factor(combinedData$variable, levels=rev(levels(combinedData$variable)))

'%!in%' <- function(x,y)!('%in%'(x,y))

pdf('../figures/figSM1.pdf')
ggplot(data = combinedData[combinedData$variable %!in% c("Treaty existence", "Any BIT violation", "Year", "Time to election"),], mapping = aes(x = value)) +
  geom_histogram(bins = 10, fill="grey", colour="black") + facet_wrap(~variable, scales = 'free', ncol=3) + labs(x=NULL, y="Count") + theme_bw() + 
  theme(legend.position="none")
dev.off()

############
# Table A.1
############

### logits

# model 1: no election
logitNoElec <- glm(anyViolations ~ polity2 + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                     NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                     NY.GDP.TOTL.RT.ZS,
                   data=finalData, family = "binomial")

logitFull <- glm(anyViolations ~ polity2*timeUntilAnyElec+ polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                   NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                   NY.GDP.TOTL.RT.ZS,
                 data=finalData, family = "binomial")

logitBinNoElec <- glm(anyViolations ~ demoBin + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                        NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                        NY.GDP.TOTL.RT.ZS,
                      data=finalData, family = "binomial")


logitBinFull <- glm(anyViolations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD + NY.GDP.MKTP.KD.ZG+
                      NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                      NY.GDP.TOTL.RT.ZS,
                    data=finalData, family = "binomial")

print(texreg(list(logitNoElec, logitFull, logitBinNoElec, logitBinFull), stars = c(.001,.01,.05), digits=3), file = "../tables/tabSM1.tex")

####################################
### linear assumption of interaction
####################################

source("interflex.R")

binning3_1 <- inter.binning(Y = "violations", D = "timeUntilAnyElec", X = "polity2", Z = c("polconiii", "NY.GDP.MKTP.CD", "NY.GDP.MKTP.KD.ZG",
                                                                                           "NE.TRD.GNFS.ZS", "BX.KLT.DINV.WD.GD.ZS", "NY.GDP.TOTL.RT.ZS"), 
                            data = finalData, nbins=3,
                            weights = NULL, showgrid = F, theme_classic=T,bin.labs=F,
                            cutoffs = c(-5, 5), na.rm = T,
                            Ylabel = expression(frac(paste(delta[y],"Any Violations | Treaty Exists", sep=""),
                                                     paste(delta[x],"Time to Election", sep=""))),
                            #Ylabel = "Relief\nContribution (log (USD/Millions))", 
                            #Dlabel = "Affectedness\n(Normalized Damage)", 
                            Xlabel="Polity") 

############
# Table A.2
############

# model 1: no lag
logitBinFullLag0 <- glm(anyViolations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                          NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                          NY.GDP.TOTL.RT.ZS,
                        data=finalData[complete.cases(finalData),], family = "binomial")

finalData$demoBin_lag1 <- relevel(finalData$demoBin_lag1, ref="autocracy")
logitBinFullLag1 <- glm(anyViolations ~ demoBin_lag1*timeUntilAnyElec + polconiii_lag1 +
                          log(NY.GDP.MKTP.CD_lag1)+ NY.GDP.MKTP.KD.ZG_lag1+
                          NE.TRD.GNFS.ZS_lag1 + BX.KLT.DINV.WD.GD.ZS_lag1 +
                          NY.GDP.TOTL.RT.ZS_lag1,
                        data=finalData[complete.cases(finalData),], family = "binomial")

finalData$demoBin_lag2 <- relevel(finalData$demoBin_lag2, ref="autocracy")
logitBinFullLag2 <- glm(anyViolations ~ demoBin_lag2*timeUntilAnyElec + polconiii_lag2 + 
                          log(NY.GDP.MKTP.CD_lag2)+ NY.GDP.MKTP.KD.ZG_lag2+
                          NE.TRD.GNFS.ZS_lag2 + BX.KLT.DINV.WD.GD.ZS_lag2 +
                          NY.GDP.TOTL.RT.ZS_lag2,
                        data=finalData[complete.cases(finalData),], family = "binomial")

finalData$demoBin_lag3 <- relevel(finalData$demoBin_lag3, ref="autocracy")
logitBinFullLag3 <- glm(anyViolations ~ demoBin_lag3*timeUntilAnyElec+ polconiii_lag3 + 
                          log(NY.GDP.MKTP.CD_lag3)+ NY.GDP.MKTP.KD.ZG_lag3+
                          NE.TRD.GNFS.ZS_lag3 + BX.KLT.DINV.WD.GD.ZS_lag3 +
                          NY.GDP.TOTL.RT.ZS_lag3,
                        data=finalData[complete.cases(finalData),], family = "binomial")

print(texreg(list(logitBinFullLag0, logitBinFullLag1, logitBinFullLag2, logitBinFullLag3), stars = c(.001,.01,.05), digits=3), file = "../tables/tabSM2.tex")

############
# Table A.3
############

### rare events logit

relogitNoElec <- zelig(anyViolations ~ polity2 + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                         NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                         NY.GDP.TOTL.RT.ZS,
                       data=finalData, model = "relogit")

relogitFull <- zelig(anyViolations ~ polity2*timeUntilAnyElec+ polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                       NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                       NY.GDP.TOTL.RT.ZS,
                     data=finalData, model = "relogit")

relogitBinNoElec <- zelig(anyViolations ~ demoBin + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                            NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                            NY.GDP.TOTL.RT.ZS,
                          data=finalData, model = "relogit")

relogitBinFull <- zelig(anyViolations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                          NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                          NY.GDP.TOTL.RT.ZS,
                        data=finalData, model = "relogit")

print(texreg(list(relogitNoElec, relogitFull, relogitBinNoElec, relogitBinFull), digits = 3), file = "../tables/tabSM3.tex")

############
# Table A.4
############

### ordered logit

orderlogitNoElec <- zelig(as.factor(finalData$violations) ~ polity2 + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                         NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                         NY.GDP.TOTL.RT.ZS,
                       data=finalData, model = "ologit")

orderlogitFull <- zelig(as.factor(finalData$violations) ~ polity2*timeUntilAnyElec+ polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                       NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                       NY.GDP.TOTL.RT.ZS,
                     data=finalData, model = "ologit")

orderlogitBinNoElec <- zelig(as.factor(finalData$violations) ~ demoBin + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                            NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                            NY.GDP.TOTL.RT.ZS,
                          data=finalData, model = "ologit")

orderlogitBinFull <- zelig(as.factor(finalData$violations) ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                          NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                          NY.GDP.TOTL.RT.ZS,
                        data=finalData, model = "ologit")

print(texreg(list(orderlogitNoElec, orderlogitFull, orderlogitBinNoElec, orderlogitBinFull), digits = 3), file = "../tables/tabSM4.tex")

############
# Table A.5
############

### negative binomial

negBinomNoElec <- glm.nb(violations ~ polity2 + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                            NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                            NY.GDP.TOTL.RT.ZS, data=finalData)

negBinomFull <- glm.nb(violations ~ polity2*timeUntilAnyElec+ polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                          NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                         NY.GDP.TOTL.RT.ZS, data=finalData)


negBinomBinNoElec <- glm.nb(violations ~ demoBin + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                               NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                              NY.GDP.TOTL.RT.ZS, data=finalData)

negBinomBinFull <- glm.nb(violations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                             NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                            NY.GDP.TOTL.RT.ZS, data=finalData)

print(texreg(list(negBinomNoElec, negBinomFull, negBinomBinNoElec, negBinomBinFull), digits = 3), file = "../tables/tabSM5.tex")

############
# Table A.6
############

### selection models

# create base selection equation starting w/
# (1) affectedness
selectEq <- treatyExistence ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
  NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
  NY.GDP.TOTL.RT.ZS

selEqnModel <- glm(selectEq, family=binomial(link="probit"), data=finalData)

outcomeEq <- anyViolations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
  NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
  NY.GDP.TOTL.RT.ZS 

outEqnModel <- selection(selectEq, outcomeEq, data = finalData, method = "2step")

print(texreg(list(outEqnModel), digits = 3), file = "../tables/tabSM6.tex")

############
# Table A.7
############

lapply(c("countrycode", "plyr",
         "margins", "cplot"), pkgTest)

detach("package:dplyr")


# load data
isdsData <- read.csv("../data/wellhausenData.csv", stringsAsFactors = T)
names(isdsData)[c(1,3, 4)] <- c("year", "iso3c", "country2iso")
isdsData$year <- ifelse(isdsData$year=="unknown", NA, as.numeric(as.character(isdsData$year)))
isdsData$concluded..Arbitration.concluded. <- revalue(isdsData$concluded..Arbitration.concluded., 
                                                      c("90" = "no", "Approximately US$248m, plus interest" = "yes"))
isdsData$winner..Status.winner.pre.annulment. <- revalue(isdsData$winner..Status.winner.pre.annulment., 
                                                         c("248" = "no", "settlement (US$357m)" = "settlement"))

isdsData$violations <- 1
isdsData$ended <- ifelse(isdsData$concluded..Arbitration.concluded.=="yes", 1, 0)
isdsData$stateLost <- ifelse(isdsData$winner..Status.winner.pre.annulment.=="investor" | isdsData$winner..Status.winner.pre.annulment.=="settlement", 1, 0)

isdsCountData <- aggregate(violations ~ year + iso3c + country2iso, isdsData, sum)
isdsCountData <- merge(aggregate(ended ~ year + iso3c + country2iso, isdsData, sum), isdsCountData, by=c("year", "iso3c", "country2iso"))
isdsCountData <- merge(aggregate(stateLost ~ year + iso3c + country2iso, isdsData, sum), isdsCountData, by=c("year", "iso3c", "country2iso"))

isdsCountData$country2iso <- revalue(isdsCountData$country2iso, c("BELLUX"="BEL"))
leftOversCountry2 <- isdsCountData[which(isdsCountData$country2iso=="BEL"),]
leftOversCountry2$country2iso <- ifelse(leftOversCountry2$country2iso=="BEL",
                                        "LUX", leftOversCountry1$country2iso)
isdsCountData <- rbind(isdsCountData, leftOversCountry2)
isdsCountData$anyViolations <- ifelse(isdsCountData$violations>0, 1, 0)

# combine with existing data

# load data
totalData <- finalData
totalData <- rename(totalData, c("anyViolations"="ourViolations"))

totalData <- totalData[ , -(grep("violations", names(totalData)))]

totalData$country2iso <- countrycode(totalData$country2, "country.name", "iso3c")
totalDataComplete <- merge(totalData, isdsCountData, by=c("year", "iso3c", "country2iso"), all.x = T)
totalDataComplete[, c("anyViolations", "violations", "stateLost", "ended")][is.na(totalDataComplete[, c("anyViolations", "violations", "stateLost", "ended")])] <- 0

totalDataCompleteCases <- totalDataComplete[, c("year", "country1", "country2", "polity2", "polconiii", "treatyExistence", "ourViolations",
                                        "NY.GDP.MKTP.CD","NY.GDP.MKTP.KD.ZG", "NE.TRD.GNFS.ZS","BX.KLT.DINV.WD.GD.ZS","NY.GDP.TOTL.RT.ZS",
                                        "timeUntilAnyElec", "demoBin", "anyViolations", "violations", "stateLost", "ended")]

totalDataCompleteCases <- totalDataCompleteCases %>% drop_na()

# model 1: no election
logitNoElecisds <- glm(anyViolations ~ polity2 + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                     NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                     NY.GDP.TOTL.RT.ZS,
                   data=totalDataCompleteCases, family = "binomial")

logitFullisds <- glm(anyViolations ~ polity2*timeUntilAnyElec+ polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                   NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                   NY.GDP.TOTL.RT.ZS,
                 data=totalDataCompleteCases, family = "binomial")

logitBinNoElecisds <- glm(anyViolations ~ demoBin + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                        NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                        NY.GDP.TOTL.RT.ZS,
                      data=totalDataCompleteCases, family = "binomial")


logitBinFullisds <- glm(anyViolations ~ demoBin*timeUntilAnyElec + polconiii + NY.GDP.MKTP.CD+ NY.GDP.MKTP.KD.ZG+
                      NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                      NY.GDP.TOTL.RT.ZS,
                    data=totalDataCompleteCases, family = "binomial")


print(texreg(list(logitNoElecisds, logitFullisds, logitBinNoElecisds, logitBinFullisds), stars = c(.001,.01,.05), digits = 3), file = "../tables/tabSM7.tex")

############
# Figure A.2
############

# marginal effects (i.e., the marginal contribution of each variable on the scale of the linear predictor) 
interactPlotDemoME1 <- as.data.frame(cplot(logitFullisds, x = "timeUntilAnyElec", dx = "polity2", what = "effect"))
interactPlotDemoME1$yvals <- as.numeric(interactPlotDemoME1$yvals)

pdf("../figures/figSM2a.pdf")
ggplot(interactPlotDemoME1,  aes(x=xvals, y=yvals)) + geom_line(aes(y=yvals)) +
  geom_line(aes(y=upper), color='black', linetype = 4, size=1.25) + 
  geom_line(aes(y=lower), color='black', linetype = 4, size=1.25) + 
  geom_hline(yintercept =0, linetype=2) +
  lims(x=c(-4,0.5), y=c(-0.00015, .0005))+
  labs(x="Time until election (years)", y="Marginal effect of level of democracy\non Pr(violation=1)") + 
  theme_classic() + theme(legend.position = "none", legend.text=element_text(size=20),
                          legend.title=element_text(size=25),
                          axis.text=element_text(size=24), axis.title=element_text(size=28))
dev.off()


interactPlotDemoME <- as.data.frame(cplot(logitFullisds, x = "polity2", dx = "timeUntilAnyElec", what = "effect"))
interactPlotDemoME$yvals <- as.numeric(interactPlotDemoME$yvals)

pdf("../figures/figSM2b.pdf")
ggplot(interactPlotDemoME,  aes(x=xvals, y=yvals)) + geom_line(aes(y=yvals)) +
  geom_line(aes(y=upper), color='black', linetype = 4, size=1.25) + 
  geom_line(aes(y=lower), color='black', linetype = 4, size=1.25) + 
  geom_hline(yintercept =0, linetype=2) +
  labs(x="Polity", y="Marginal effect of time until election\non Pr(violation=1)") + 
  theme_classic() + theme(legend.position = "none", legend.text=element_text(size=20),
                          legend.title=element_text(size=25),
                          axis.text=element_text(size=24), axis.title=element_text(size=28))

dev.off()

############
# Table A.8
############

# model 1: no election
percentLostNoElec <- glm(cbind(stateLost, violations) ~ polity2 + polconiii + log(NY.GDP.MKTP.CD)+ NY.GDP.MKTP.KD.ZG+
                           NE.TRD.GNFS.ZS + BX.KLT.DINV.WD.GD.ZS +
                           NY.GDP.TOTL.RT.ZS,
                         data=totalDataCompleteCases[totalDataCompleteCases$violations>0,], family = "binomial")

print(texreg(list(percentLostNoElec), stars = c(.001,.01,.05), digits = 3), file = "../tables/tabSM8.tex")

#############
# Figure A.3
# and A.4
#############

# retain 0.8992784 of data (~ -11% of cases dropped due to missingness)
tempData <- finalData[which(!is.na(finalData[, !grepl("lag", names(finalData ))])),  !grepl("lag", names(finalData ))]
NAdata <- tempData[!complete.cases(tempData[, !(colnames(tempData) %in% c("violations", "anyViolations", "SignDate", "EntryDate", "ICSID", "time.treaty")), drop = FALSE]),]

tempData$NAinCase <- do.call(paste0, tempData) %in% do.call(paste0, NAdata)
tempData$NAinCase <- ifelse(tempData$NAinCase=="TRUE", 0, 1)
source("displayTreatFig.R")
plot1_25 <- displayTreatFig(unit.id = "iso3c", 
                            time.id = "year", legend.position = "bottom", title = "",
                            xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                            treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[1:25]),])
plot26_50 <- displayTreatFig(unit.id = "iso3c", 
                             # title: Distribution of Government Religious Affiliation Across Country and Time
                             time.id = "year", legend.position = "bottom", title = "",
                             xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                             treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[26:50]),])
plot51_75 <- displayTreatFig(unit.id = "iso3c", 
                             # title: Distribution of Government Religious Affiliation Across Country and Time
                             time.id = "year", legend.position = "bottom", title = "",
                             xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                             treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[51:75]),])
plot76_100 <- displayTreatFig(unit.id = "iso3c", 
                              # title: Distribution of Government Religious Affiliation Across Country and Time
                              time.id = "year", legend.position = "bottom", title = "",
                              xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                              treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[76:100]),])

plot101_125 <- displayTreatFig(unit.id = "iso3c", 
                               # title: Distribution of Government Religious Affiliation Across Country and Time
                               time.id = "year", legend.position = "bottom", title = "",
                               xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                               treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[101:125]),])
plot126_150 <- displayTreatFig(unit.id = "iso3c", 
                               # title: Distribution of Government Religious Affiliation Across Country and Time
                               time.id = "year", legend.position = "bottom", title = "",
                               xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                               treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[126:150]),])
plot151_ <- displayTreatFig(unit.id = "iso3c", 
                            # title: Distribution of Government Religious Affiliation Across Country and Time
                            time.id = "year", legend.position = "bottom", title = "",
                            xlab = "\nYear", ylab = "Country\n", y.size = 8, x.size = 10,
                            treatment = "NAinCase", data = tempData[which(tempData$iso3c %in% unique(tempData$iso3c)[151:length(unique(tempData$iso3c))]),])


g_legend <- function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}

pdf("../figures/figSM3.pdf", height=15, width=10)
grid.arrange(arrangeGrob(plot1_25 + theme(legend.position="none"), plot26_50 + theme(legend.position="none"),
                         plot51_75 + theme(legend.position="none"), plot76_100 + theme(legend.position="none"),
                         nrow=2), g_legend(plot1_25), nrow=2, heights=c(15, 1)) 
dev.off()

pdf("../figures/figSM4.pdf", height=15, width=10)
grid.arrange(arrangeGrob(plot101_125 + theme(legend.position="none"),
                         plot126_150 + theme(legend.position="none"), plot151_ + theme(legend.position="none"),
                         nrow=2), g_legend(plot1_25), nrow=2, heights=c(15, 2)) 
dev.off()

#############
# Figure A.5
#############

# create plots of subsamples used in regression analyses
load('../../data/totalDataLag0_dropped.Rdata'); outputDataFrame$NY.GDP.MKTP.CD <- log(outputDataFrame$NY.GDP.MKTP.CD); totalDataLag0_dropped <- outputDataFrame
load('../../data/totalDataLag1_dropped.Rdata'); outputDataFrame$NY.GDP.MKTP.CD_lag1 <- log(outputDataFrame$NY.GDP.MKTP.CD_lag1); totalDataLag1_dropped <- outputDataFrame
load('../../data/totalDataLag2_dropped.Rdata'); outputDataFrame$NY.GDP.MKTP.CD_lag2 <- log(outputDataFrame$NY.GDP.MKTP.CD_lag2); totalDataLag2_dropped <- outputDataFrame
load('../../data/totalDataLag5_dropped.Rdata'); outputDataFrame$NY.GDP.MKTP.CD_lag5 <- log(outputDataFrame$NY.GDP.MKTP.CD_lag5); totalDataLag5_dropped <- outputDataFrame

createSamplePlots <- function(lag=0, outputDataFrame){
  load(paste('../../data/totalDataLag', as.character(lag), '_dropped.Rdata', sep=""))
  plotData <- get(grep(paste('totalDataLag', as.character(lag), '_dropped', sep=""), names(which(unlist(eapply(.GlobalEnv,is.data.frame)))), value=T))
  if(lag==0){
    noLagVars <- c("polity2", "polconiii", "NY.GDP.MKTP.CD",
                   "NY.GDP.MKTP.KD.ZG", "NE.TRD.GNFS.ZS", "BX.KLT.DINV.WD.GD.ZS", "NY.GDP.TOTL.RT.ZS")
    plotData <- plotData[complete.cases(plotData[, noLagVars]), c("year", "violations", noLagVars)]
  }
  if(lag>0){
    lagVars <- grep(paste("_lag", as.character(lag), sep=""), names(plotData), value=T)[-2]
    plotData <- plotData[complete.cases(plotData[, lagVars]), c("year", "violations", lagVars),]
    
  }
  combinedData <- melt(as.data.frame(plotData), id="year")
  combinedData$sample <- as.factor(paste("Lag", as.character(lag), sep=""))
  levels(combinedData$variable) <- c("BIT violations", "Polity 2", "Political constraints", "log(GDP)", "GDP change",
                                     "Trade (% of GDP)","FDI inflows (% of GDP)", "Resources (% of GDP)")
  assign(paste("combinedData", as.character(lag), sep=""), combinedData, envir = .GlobalEnv)
}
createSamplePlots(lag = 0)
createSamplePlots(lag = 1)
createSamplePlots(lag = 2)
createSamplePlots(lag = 5)
combinedData <- rbind(combinedData0, combinedData1, combinedData2, combinedData5)
levels(combinedData$sample) <- c("No lag", "Lag 1", "Lag 2", "Lag 5")

pdf('../figures/figSM5.pdf')
ggplot(data = combinedData, mapping = aes(x = value, fill=sample)) + scale_fill_grey() +
  geom_histogram(aes(y = (..count..)), position="dodge", bins=10) + facet_wrap(~variable, scales = 'free') + 
  labs(x=NULL, y="Count", fill = "Sample") + theme_bw() + theme(legend.position = c(0.85, 0.15), legend.text=element_text(size=10),
                                                                legend.title=element_text(size=14), strip.text=element_text(size=10),
                                                                axis.text=element_text(size=10), axis.title=element_text(size=14))
dev.off()
